// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.0 %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t.0 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2

// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.1 %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.1 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4

// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.2 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t.2 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"

// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK7
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.3 %s
// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t.3 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK8

// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK9
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.4 %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.4 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK10

// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.5 %s
// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t.5 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"

// expected-no-diagnostics
#ifndef HEADER
#define HEADER

float flag;
int main (int argc, char **argv) {
#pragma omp parallel
{
#pragma omp cancel parallel if(flag)
  argv[0][0] = argc;
#pragma omp barrier
  argv[0][0] += argc;
}
#pragma omp sections
{
#pragma omp cancel sections
}
#pragma omp sections
{
#pragma omp cancel sections
#pragma omp section
  {
#pragma omp cancel sections
  }
}
#pragma omp for
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for if(cancel: flag)
}
#pragma omp task
{
#pragma omp cancel taskgroup
}
#pragma omp parallel sections
{
#pragma omp cancel sections
}
#pragma omp parallel sections
{
#pragma omp cancel sections
#pragma omp section
  {
#pragma omp cancel sections
  }
}
int r = 0;
#pragma omp parallel for reduction(+: r)
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for
  r += i;
}
  return argc;
}

#endif
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[I24:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK1-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK1-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK1-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
// CHECK1-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
// CHECK1-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1:       omp.inner.for.cond:
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1:       omp.inner.for.body:
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK1-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK1-NEXT:    ]
// CHECK1:       .omp.sections.case:
// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK1-NEXT:    br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK1:       .cancel.exit:
// CHECK1-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK1:       .cancel.continue:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK1:       .omp.sections.exit:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1:       omp.inner.for.inc:
// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK1:       omp.inner.for.end:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK1:       cancel.cont:
// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK1-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
// CHECK1-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
// CHECK1-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK1-NEXT:    store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND6:%.*]]
// CHECK1:       omp.inner.for.cond6:
// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK1-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK1-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
// CHECK1:       omp.inner.for.body8:
// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK1-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
// CHECK1-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
// CHECK1-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
// CHECK1-NEXT:    ]
// CHECK1:       .omp.sections.case9:
// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
// CHECK1:       .cancel.exit10:
// CHECK1-NEXT:    br label [[CANCEL_EXIT19:%.*]]
// CHECK1:       cancel.exit:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[CANCEL_CONT]]
// CHECK1:       .cancel.continue11:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK1:       .omp.sections.case12:
// CHECK1-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK1-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK1-NEXT:    br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
// CHECK1:       .cancel.exit13:
// CHECK1-NEXT:    br label [[CANCEL_EXIT19]]
// CHECK1:       .cancel.continue14:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK1:       .omp.sections.exit15:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
// CHECK1:       omp.inner.for.inc16:
// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK1-NEXT:    [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK1-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND6]]
// CHECK1:       omp.inner.for.end18:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[CANCEL_CONT20:%.*]]
// CHECK1:       cancel.cont20:
// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK1-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT:    [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT:    store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
// CHECK1-NEXT:    br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1:       omp.precond.then:
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK1-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK1-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
// CHECK1-NEXT:    br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1:       cond.true:
// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK1-NEXT:    br label [[COND_END:%.*]]
// CHECK1:       cond.false:
// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    br label [[COND_END]]
// CHECK1:       cond.end:
// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
// CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT:    store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND26:%.*]]
// CHECK1:       omp.inner.for.cond26:
// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
// CHECK1-NEXT:    br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
// CHECK1:       omp.inner.for.body28:
// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[I24]], align 4
// CHECK1-NEXT:    [[TMP35:%.*]] = load float, float* @flag, align 4
// CHECK1-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1:       omp_if.then:
// CHECK1-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
// CHECK1-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK1-NEXT:    br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
// CHECK1:       .cancel.exit29:
// CHECK1-NEXT:    br label [[CANCEL_EXIT34:%.*]]
// CHECK1:       cancel.exit19:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[CANCEL_CONT20]]
// CHECK1:       .cancel.continue30:
// CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK1:       omp_if.else:
// CHECK1-NEXT:    br label [[OMP_IF_END]]
// CHECK1:       omp_if.end:
// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1:       omp.body.continue:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC31:%.*]]
// CHECK1:       omp.inner.for.inc31:
// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
// CHECK1-NEXT:    store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND26]]
// CHECK1:       omp.inner.for.end33:
// CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1:       omp.loop.exit:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK1:       cancel.exit34:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK1-NEXT:    br label [[CANCEL_CONT35:%.*]]
// CHECK1:       omp.precond.end:
// CHECK1-NEXT:    br label [[CANCEL_CONT35]]
// CHECK1:       cancel.cont35:
// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK1-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK1-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
// CHECK1-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK1-NEXT:    ret i32 [[TMP43]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8***, align 8
// CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT:    store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8
// CHECK1-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
// CHECK1-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1:       omp_if.then:
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK1-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
// CHECK1-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK1-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK1:       .cancel.exit:
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
// CHECK1-NEXT:    br label [[RETURN:%.*]]
// CHECK1:       .cancel.continue:
// CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK1:       omp_if.else:
// CHECK1-NEXT:    br label [[OMP_IF_END]]
// CHECK1:       omp_if.end:
// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
// CHECK1-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
// CHECK1-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
// CHECK1-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
// CHECK1-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
// CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
// CHECK1-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK1-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK1:       .cancel.exit2:
// CHECK1-NEXT:    br label [[RETURN]]
// CHECK1:       .cancel.continue3:
// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK1-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
// CHECK1-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
// CHECK1-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
// CHECK1-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
// CHECK1-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
// CHECK1-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
// CHECK1-NEXT:    br label [[RETURN]]
// CHECK1:       return:
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK1-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK1-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK1-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK1-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK1-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK1-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK1-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK1:       .cancel.exit.i:
// CHECK1-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
// CHECK1:       .cancel.continue.i:
// CHECK1-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
// CHECK1:       .omp_outlined..1.exit:
// CHECK1-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK1-NEXT:    ret i32 0
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1:       omp.inner.for.cond:
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1:       omp.inner.for.body:
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK1-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK1-NEXT:    ]
// CHECK1:       .omp.sections.case:
// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK1-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK1-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK1:       .cancel.exit:
// CHECK1-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK1:       .cancel.continue:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK1:       .omp.sections.exit:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1:       omp.inner.for.inc:
// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK1:       omp.inner.for.end:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK1:       cancel.cont:
// CHECK1-NEXT:    ret void
// CHECK1:       cancel.exit:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK1-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
// CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK1-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1:       omp.inner.for.cond:
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK1-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1:       omp.inner.for.body:
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK1-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK1-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
// CHECK1-NEXT:    ]
// CHECK1:       .omp.sections.case:
// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK1-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK1-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK1:       .cancel.exit:
// CHECK1-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK1:       .cancel.continue:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK1:       .omp.sections.case1:
// CHECK1-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK1-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK1-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK1:       .cancel.exit2:
// CHECK1-NEXT:    br label [[CANCEL_EXIT]]
// CHECK1:       .cancel.continue3:
// CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK1:       .omp.sections.exit:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1:       omp.inner.for.inc:
// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK1:       omp.inner.for.end:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK1:       cancel.cont:
// CHECK1-NEXT:    ret void
// CHECK1:       cancel.exit:
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK1-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK1-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK1-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1:       omp.precond.then:
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1:       cond.true:
// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT:    br label [[COND_END:%.*]]
// CHECK1:       cond.false:
// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    br label [[COND_END]]
// CHECK1:       cond.end:
// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT:    store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1:       omp.inner.for.cond:
// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1:       omp.inner.for.body:
// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK1-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK1-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK1:       .cancel.exit:
// CHECK1-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK1:       .cancel.continue:
// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
// CHECK1-NEXT:    store i32 [[ADD7]], i32* [[R3]], align 4
// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1:       omp.body.continue:
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1:       omp.inner.for.inc:
// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK1-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK1:       omp.inner.for.end:
// CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1:       omp.loop.exit:
// CHECK1-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK1-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
// CHECK1-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
// CHECK1-NEXT:    [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT:    switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT:    ]
// CHECK1:       .omp.reduction.case1:
// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK1-NEXT:    store i32 [[ADD9]], i32* [[TMP1]], align 4
// CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1:       cancel.exit:
// CHECK1-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
// CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK1:       .omp.reduction.case2:
// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
// CHECK1-NEXT:    [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1:       .omp.reduction.default:
// CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK1:       omp.precond.end:
// CHECK1-NEXT:    br label [[CANCEL_CONT]]
// CHECK1:       cancel.cont:
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK1-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@main
// CHECK2-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[I24:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK2-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK2-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
// CHECK2-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2:       omp.inner.for.cond:
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2:       omp.inner.for.body:
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK2-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK2-NEXT:    ]
// CHECK2:       .omp.sections.case:
// CHECK2-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK2-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK2-NEXT:    br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK2:       .cancel.exit:
// CHECK2-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK2:       .cancel.continue:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2:       .omp.sections.exit:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2:       omp.inner.for.inc:
// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK2:       omp.inner.for.end:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK2:       cancel.cont:
// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK2-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
// CHECK2-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
// CHECK2-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK2-NEXT:    store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND6:%.*]]
// CHECK2:       omp.inner.for.cond6:
// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK2-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK2-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
// CHECK2:       omp.inner.for.body8:
// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK2-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
// CHECK2-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
// CHECK2-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
// CHECK2-NEXT:    ]
// CHECK2:       .omp.sections.case9:
// CHECK2-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK2-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK2-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
// CHECK2:       .cancel.exit10:
// CHECK2-NEXT:    br label [[CANCEL_EXIT19:%.*]]
// CHECK2:       cancel.exit:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[CANCEL_CONT]]
// CHECK2:       .cancel.continue11:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK2:       .omp.sections.case12:
// CHECK2-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK2-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK2-NEXT:    br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
// CHECK2:       .cancel.exit13:
// CHECK2-NEXT:    br label [[CANCEL_EXIT19]]
// CHECK2:       .cancel.continue14:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK2:       .omp.sections.exit15:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
// CHECK2:       omp.inner.for.inc16:
// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK2-NEXT:    [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK2-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND6]]
// CHECK2:       omp.inner.for.end18:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[CANCEL_CONT20:%.*]]
// CHECK2:       cancel.cont20:
// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT:    [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT:    store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
// CHECK2-NEXT:    br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2:       omp.precond.then:
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK2-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK2-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
// CHECK2-NEXT:    br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2:       cond.true:
// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK2-NEXT:    br label [[COND_END:%.*]]
// CHECK2:       cond.false:
// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    br label [[COND_END]]
// CHECK2:       cond.end:
// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT:    store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND26:%.*]]
// CHECK2:       omp.inner.for.cond26:
// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
// CHECK2-NEXT:    br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
// CHECK2:       omp.inner.for.body28:
// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[I24]], align 4
// CHECK2-NEXT:    [[TMP35:%.*]] = load float, float* @flag, align 4
// CHECK2-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
// CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK2:       omp_if.then:
// CHECK2-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
// CHECK2-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK2-NEXT:    br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
// CHECK2:       .cancel.exit29:
// CHECK2-NEXT:    br label [[CANCEL_EXIT34:%.*]]
// CHECK2:       cancel.exit19:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[CANCEL_CONT20]]
// CHECK2:       .cancel.continue30:
// CHECK2-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK2:       omp_if.else:
// CHECK2-NEXT:    br label [[OMP_IF_END]]
// CHECK2:       omp_if.end:
// CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2:       omp.body.continue:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC31:%.*]]
// CHECK2:       omp.inner.for.inc31:
// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
// CHECK2-NEXT:    store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND26]]
// CHECK2:       omp.inner.for.end33:
// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2:       omp.loop.exit:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK2:       cancel.exit34:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK2-NEXT:    br label [[CANCEL_CONT35:%.*]]
// CHECK2:       omp.precond.end:
// CHECK2-NEXT:    br label [[CANCEL_CONT35]]
// CHECK2:       cancel.cont35:
// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK2-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK2-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
// CHECK2-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK2-NEXT:    ret i32 [[TMP43]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8***, align 8
// CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT:    store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8
// CHECK2-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK2-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
// CHECK2-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
// CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK2:       omp_if.then:
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
// CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK2-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK2:       .cancel.exit:
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
// CHECK2-NEXT:    br label [[RETURN:%.*]]
// CHECK2:       .cancel.continue:
// CHECK2-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK2:       omp_if.else:
// CHECK2-NEXT:    br label [[OMP_IF_END]]
// CHECK2:       omp_if.end:
// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK2-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
// CHECK2-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
// CHECK2-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
// CHECK2-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
// CHECK2-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
// CHECK2-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
// CHECK2-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK2-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK2:       .cancel.exit2:
// CHECK2-NEXT:    br label [[RETURN]]
// CHECK2:       .cancel.continue3:
// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK2-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
// CHECK2-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
// CHECK2-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
// CHECK2-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
// CHECK2-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
// CHECK2-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
// CHECK2-NEXT:    br label [[RETURN]]
// CHECK2:       return:
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK2-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK2-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK2-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK2-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK2-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK2-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK2-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK2-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK2:       .cancel.exit.i:
// CHECK2-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
// CHECK2:       .cancel.continue.i:
// CHECK2-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
// CHECK2:       .omp_outlined..1.exit:
// CHECK2-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK2-NEXT:    ret i32 0
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2:       omp.inner.for.cond:
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2:       omp.inner.for.body:
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK2-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK2-NEXT:    ]
// CHECK2:       .omp.sections.case:
// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK2-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK2-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK2:       .cancel.exit:
// CHECK2-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK2:       .cancel.continue:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2:       .omp.sections.exit:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2:       omp.inner.for.inc:
// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK2:       omp.inner.for.end:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK2:       cancel.cont:
// CHECK2-NEXT:    ret void
// CHECK2:       cancel.exit:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK2-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
// CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2:       omp.inner.for.cond:
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2:       omp.inner.for.body:
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK2-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK2-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
// CHECK2-NEXT:    ]
// CHECK2:       .omp.sections.case:
// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK2-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK2-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK2:       .cancel.exit:
// CHECK2-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK2:       .cancel.continue:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2:       .omp.sections.case1:
// CHECK2-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK2-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK2-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK2:       .cancel.exit2:
// CHECK2-NEXT:    br label [[CANCEL_EXIT]]
// CHECK2:       .cancel.continue3:
// CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2:       .omp.sections.exit:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2:       omp.inner.for.inc:
// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK2:       omp.inner.for.end:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK2:       cancel.cont:
// CHECK2-NEXT:    ret void
// CHECK2:       cancel.exit:
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK2-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK2-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2:       omp.precond.then:
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK2-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2:       cond.true:
// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT:    br label [[COND_END:%.*]]
// CHECK2:       cond.false:
// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    br label [[COND_END]]
// CHECK2:       cond.end:
// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT:    store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2:       omp.inner.for.cond:
// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2:       omp.inner.for.body:
// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK2-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK2-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
// CHECK2-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK2-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK2:       .cancel.exit:
// CHECK2-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK2:       .cancel.continue:
// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
// CHECK2-NEXT:    store i32 [[ADD7]], i32* [[R3]], align 4
// CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2:       omp.body.continue:
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2:       omp.inner.for.inc:
// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK2-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK2:       omp.inner.for.end:
// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2:       omp.loop.exit:
// CHECK2-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK2-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK2-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
// CHECK2-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
// CHECK2-NEXT:    [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK2-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT:    switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK2-NEXT:    ]
// CHECK2:       .omp.reduction.case1:
// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
// CHECK2-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK2-NEXT:    store i32 [[ADD9]], i32* [[TMP1]], align 4
// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2:       cancel.exit:
// CHECK2-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
// CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK2:       .omp.reduction.case2:
// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
// CHECK2-NEXT:    [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2:       .omp.reduction.default:
// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK2:       omp.precond.end:
// CHECK2-NEXT:    br label [[CANCEL_CONT]]
// CHECK2:       cancel.cont:
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK2-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@main
// CHECK3-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK3-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[I36:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK3-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK3-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK3:       omp_parallel:
// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK3:       omp.par.outlined.exit:
// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK3:       omp.par.exit.split:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
// CHECK3:       omp_section_loop.preheader:
// CHECK3-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
// CHECK3-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER:%.*]]
// CHECK3:       omp_section_loop.header:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_COND:%.*]]
// CHECK3:       omp_section_loop.cond:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
// CHECK3-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
// CHECK3:       omp_section_loop.body:
// CHECK3-NEXT:    [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
// CHECK3-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], 1
// CHECK3-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
// CHECK3-NEXT:    switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
// CHECK3-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
// CHECK3-NEXT:    ]
// CHECK3:       omp_section_loop.inc:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
// CHECK3:       omp_section_loop.exit:
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
// CHECK3:       omp_section_loop.after:
// CHECK3-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
// CHECK3:       omp_sections.end:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
// CHECK3:       omp_section_loop.preheader13:
// CHECK3-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
// CHECK3-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
// CHECK3-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
// CHECK3:       omp_section_loop.header14:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
// CHECK3:       omp_section_loop.cond15:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
// CHECK3-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
// CHECK3:       omp_section_loop.body16:
// CHECK3-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
// CHECK3-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
// CHECK3-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
// CHECK3-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
// CHECK3-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
// CHECK3-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
// CHECK3-NEXT:    ]
// CHECK3:       omp_section_loop.inc17:
// CHECK3-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
// CHECK3:       omp_section_loop.exit18:
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
// CHECK3:       omp_section_loop.after19:
// CHECK3-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
// CHECK3:       omp_sections.end33:
// CHECK3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK3-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
// CHECK3-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3:       omp.precond.then:
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK3-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK3-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
// CHECK3-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3:       cond.true:
// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK3-NEXT:    br label [[COND_END:%.*]]
// CHECK3:       cond.false:
// CHECK3-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    br label [[COND_END]]
// CHECK3:       cond.end:
// CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
// CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3:       omp.inner.for.cond:
// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
// CHECK3-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3:       omp.inner.for.body:
// CHECK3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
// CHECK3-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
// CHECK3-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
// CHECK3-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
// CHECK3-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK3:       omp_if.then:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK3-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
// CHECK3-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK3-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK3:       .cancel.exit:
// CHECK3-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK3:       omp_section_loop.body.case:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
// CHECK3-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
// CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
// CHECK3:       omp_section_loop.body.case.split:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK3:       omp_section_loop.body.case.cncl:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK3:       omp_section_loop.body.case23:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
// CHECK3-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
// CHECK3-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
// CHECK3:       omp_section_loop.body.case23.split:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK3:       omp_section_loop.body.case23.cncl:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK3:       omp_section_loop.body.case25:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
// CHECK3-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
// CHECK3-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
// CHECK3:       omp_section_loop.body.case25.split:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK3:       omp_section_loop.body.case25.cncl:
// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK3:       .cancel.continue:
// CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK3:       omp_if.else:
// CHECK3-NEXT:    br label [[OMP_IF_END]]
// CHECK3:       omp_if.end:
// CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3:       omp.body.continue:
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3:       omp.inner.for.inc:
// CHECK3-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
// CHECK3-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK3:       omp.inner.for.end:
// CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3:       omp.loop.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
// CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK3:       cancel.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
// CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK3:       omp.precond.end:
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
// CHECK3:       cancel.cont:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
// CHECK3-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
// CHECK3-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK3-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK3-NEXT:    [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT:    ret i32 [[TMP40]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@main..omp_par
// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT:  omp.par.entry:
// CHECK3-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
// CHECK3-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
// CHECK3-NEXT:    br label [[OMP_PAR_REGION:%.*]]
// CHECK3:       omp.par.outlined.exit.exitStub:
// CHECK3-NEXT:    ret void
// CHECK3:       omp.par.region:
// CHECK3-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
// CHECK3-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
// CHECK3:       2:
// CHECK3-NEXT:    br label [[TMP3:%.*]]
// CHECK3:       3:
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
// CHECK3-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
// CHECK3-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
// CHECK3-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
// CHECK3:       .cncl5:
// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
// CHECK3:       .cont:
// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK3-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
// CHECK3-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
// CHECK3-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
// CHECK3-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// CHECK3-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
// CHECK3-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
// CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK3:       omp.par.pre_finalize:
// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK3:       13:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
// CHECK3-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
// CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
// CHECK3:       .cncl:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK3:       .split:
// CHECK3-NEXT:    br label [[TMP3]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK3-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK3-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK3-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK3-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK3-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK3-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK3-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
// CHECK3-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK3-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK3:       .cancel.exit.i:
// CHECK3-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__EXIT:%.*]]
// CHECK3:       .cancel.continue.i:
// CHECK3-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK3:       .omp_outlined..exit:
// CHECK3-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK3-NEXT:    ret i32 0
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
// CHECK3-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3:       omp.inner.for.cond:
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3:       omp.inner.for.body:
// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK3-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK3-NEXT:    ]
// CHECK3:       .omp.sections.case:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK3:       .omp.sections.case.split:
// CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK3:       .omp.sections.case.cncl:
// CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK3:       .omp.sections.exit:
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3:       omp.inner.for.inc:
// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK3-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK3:       omp.inner.for.end:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
// CHECK3:       cancel.cont:
// CHECK3-NEXT:    ret void
// CHECK3:       cancel.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
// CHECK3-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3:       omp.inner.for.cond:
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK3-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3:       omp.inner.for.body:
// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK3-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK3-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
// CHECK3-NEXT:    ]
// CHECK3:       .omp.sections.case:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK3:       .omp.sections.case.split:
// CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK3:       .omp.sections.case.cncl:
// CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK3:       .omp.sections.case2:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
// CHECK3-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
// CHECK3-NEXT:    br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
// CHECK3:       .omp.sections.case2.split:
// CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK3:       .omp.sections.case2.cncl:
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_END]]
// CHECK3:       .omp.sections.exit:
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3:       omp.inner.for.inc:
// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK3:       omp.inner.for.end:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
// CHECK3:       cancel.cont:
// CHECK3-NEXT:    ret void
// CHECK3:       cancel.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK3-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK3-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK3-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3:       omp.precond.then:
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3:       cond.true:
// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT:    br label [[COND_END:%.*]]
// CHECK3:       cond.false:
// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    br label [[COND_END]]
// CHECK3:       cond.end:
// CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3:       omp.inner.for.cond:
// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK3-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3:       omp.inner.for.body:
// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
// CHECK3-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
// CHECK3-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
// CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK3:       .cancel.exit:
// CHECK3-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK3:       .cancel.continue:
// CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
// CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
// CHECK3-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
// CHECK3-NEXT:    store i32 [[ADD8]], i32* [[R3]], align 4
// CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3:       omp.body.continue:
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3:       omp.inner.for.inc:
// CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK3-NEXT:    store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK3:       omp.inner.for.end:
// CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3:       omp.loop.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK3-NEXT:    [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK3-NEXT:    store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK3-NEXT:    [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT:    switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK3-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK3-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT:    ]
// CHECK3:       .omp.reduction.case1:
// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK3-NEXT:    [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
// CHECK3-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
// CHECK3-NEXT:    store i32 [[ADD13]], i32* [[TMP1]], align 4
// CHECK3-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3:       cancel.exit:
// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
// CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK3:       .omp.reduction.case2:
// CHECK3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
// CHECK3-NEXT:    [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
// CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3:       .omp.reduction.default:
// CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK3:       omp.precond.end:
// CHECK3-NEXT:    br label [[CANCEL_CONT]]
// CHECK3:       cancel.cont:
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK3-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK3-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK3-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK3-NEXT:    ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@main
// CHECK4-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK4-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[I36:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK4-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK4-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK4:       omp_parallel:
// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK4:       omp.par.outlined.exit:
// CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK4:       omp.par.exit.split:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
// CHECK4:       omp_section_loop.preheader:
// CHECK4-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
// CHECK4-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
// CHECK4-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER:%.*]]
// CHECK4:       omp_section_loop.header:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_COND:%.*]]
// CHECK4:       omp_section_loop.cond:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
// CHECK4-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
// CHECK4:       omp_section_loop.body:
// CHECK4-NEXT:    [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
// CHECK4-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], 1
// CHECK4-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
// CHECK4-NEXT:    switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
// CHECK4-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
// CHECK4-NEXT:    ]
// CHECK4:       omp_section_loop.inc:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
// CHECK4:       omp_section_loop.exit:
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
// CHECK4:       omp_section_loop.after:
// CHECK4-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
// CHECK4:       omp_sections.end:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
// CHECK4:       omp_section_loop.preheader13:
// CHECK4-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
// CHECK4-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
// CHECK4-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
// CHECK4:       omp_section_loop.header14:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
// CHECK4:       omp_section_loop.cond15:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
// CHECK4-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
// CHECK4:       omp_section_loop.body16:
// CHECK4-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
// CHECK4-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
// CHECK4-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
// CHECK4-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
// CHECK4-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
// CHECK4-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
// CHECK4-NEXT:    ]
// CHECK4:       omp_section_loop.inc17:
// CHECK4-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
// CHECK4:       omp_section_loop.exit18:
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
// CHECK4:       omp_section_loop.after19:
// CHECK4-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
// CHECK4:       omp_sections.end33:
// CHECK4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK4-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK4-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK4-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
// CHECK4-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK4:       omp.precond.then:
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK4-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK4-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
// CHECK4-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK4:       cond.true:
// CHECK4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK4-NEXT:    br label [[COND_END:%.*]]
// CHECK4:       cond.false:
// CHECK4-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    br label [[COND_END]]
// CHECK4:       cond.end:
// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
// CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4:       omp.inner.for.cond:
// CHECK4-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
// CHECK4-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4:       omp.inner.for.body:
// CHECK4-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
// CHECK4-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
// CHECK4-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
// CHECK4-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
// CHECK4-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
// CHECK4-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK4:       omp_if.then:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK4-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
// CHECK4-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK4-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK4:       .cancel.exit:
// CHECK4-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK4:       omp_section_loop.body.case:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
// CHECK4-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
// CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
// CHECK4:       omp_section_loop.body.case.split:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK4:       omp_section_loop.body.case.cncl:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK4:       omp_section_loop.body.case23:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
// CHECK4-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
// CHECK4-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
// CHECK4:       omp_section_loop.body.case23.split:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK4:       omp_section_loop.body.case23.cncl:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK4:       omp_section_loop.body.case25:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
// CHECK4-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
// CHECK4-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
// CHECK4:       omp_section_loop.body.case25.split:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK4:       omp_section_loop.body.case25.cncl:
// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK4:       .cancel.continue:
// CHECK4-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK4:       omp_if.else:
// CHECK4-NEXT:    br label [[OMP_IF_END]]
// CHECK4:       omp_if.end:
// CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4:       omp.body.continue:
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4:       omp.inner.for.inc:
// CHECK4-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
// CHECK4-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK4:       omp.inner.for.end:
// CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4:       omp.loop.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
// CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK4:       cancel.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
// CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK4:       omp.precond.end:
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
// CHECK4:       cancel.cont:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
// CHECK4-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
// CHECK4-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK4-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK4-NEXT:    [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT:    ret i32 [[TMP40]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@main..omp_par
// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK4-NEXT:  omp.par.entry:
// CHECK4-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
// CHECK4-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
// CHECK4-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
// CHECK4-NEXT:    br label [[OMP_PAR_REGION:%.*]]
// CHECK4:       omp.par.outlined.exit.exitStub:
// CHECK4-NEXT:    ret void
// CHECK4:       omp.par.region:
// CHECK4-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
// CHECK4-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
// CHECK4-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
// CHECK4:       2:
// CHECK4-NEXT:    br label [[TMP3:%.*]]
// CHECK4:       3:
// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
// CHECK4-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
// CHECK4-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
// CHECK4-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
// CHECK4:       .cncl5:
// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
// CHECK4:       .cont:
// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK4-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK4-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
// CHECK4-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
// CHECK4-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
// CHECK4-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
// CHECK4-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// CHECK4-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
// CHECK4-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
// CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK4:       omp.par.pre_finalize:
// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK4:       13:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
// CHECK4-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
// CHECK4-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
// CHECK4:       .cncl:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK4:       .split:
// CHECK4-NEXT:    br label [[TMP3]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK4-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK4-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK4-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK4-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK4-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK4-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK4-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK4-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK4-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK4-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK4-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK4-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK4-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
// CHECK4-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
// CHECK4-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK4-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK4:       .cancel.exit.i:
// CHECK4-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__EXIT:%.*]]
// CHECK4:       .cancel.continue.i:
// CHECK4-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK4:       .omp_outlined..exit:
// CHECK4-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK4-NEXT:    ret i32 0
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
// CHECK4-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
// CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4:       omp.inner.for.cond:
// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK4-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4:       omp.inner.for.body:
// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK4-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK4-NEXT:    ]
// CHECK4:       .omp.sections.case:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK4:       .omp.sections.case.split:
// CHECK4-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK4:       .omp.sections.case.cncl:
// CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK4:       .omp.sections.exit:
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4:       omp.inner.for.inc:
// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK4-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK4:       omp.inner.for.end:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
// CHECK4:       cancel.cont:
// CHECK4-NEXT:    ret void
// CHECK4:       cancel.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
// CHECK4-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
// CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4:       omp.inner.for.cond:
// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK4-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK4-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4:       omp.inner.for.body:
// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK4-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK4-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
// CHECK4-NEXT:    ]
// CHECK4:       .omp.sections.case:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK4:       .omp.sections.case.split:
// CHECK4-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK4:       .omp.sections.case.cncl:
// CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK4:       .omp.sections.case2:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
// CHECK4-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
// CHECK4-NEXT:    br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
// CHECK4:       .omp.sections.case2.split:
// CHECK4-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK4:       .omp.sections.case2.cncl:
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_END]]
// CHECK4:       .omp.sections.exit:
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4:       omp.inner.for.inc:
// CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK4-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK4:       omp.inner.for.end:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
// CHECK4:       cancel.cont:
// CHECK4-NEXT:    ret void
// CHECK4:       cancel.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK4-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK4-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK4-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK4-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK4-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK4:       omp.precond.then:
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK4-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK4-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK4-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK4:       cond.true:
// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK4-NEXT:    br label [[COND_END:%.*]]
// CHECK4:       cond.false:
// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    br label [[COND_END]]
// CHECK4:       cond.end:
// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4:       omp.inner.for.cond:
// CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK4-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4:       omp.inner.for.body:
// CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK4-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
// CHECK4-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
// CHECK4-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
// CHECK4-NEXT:    br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK4:       .cancel.exit:
// CHECK4-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK4:       .cancel.continue:
// CHECK4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
// CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
// CHECK4-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
// CHECK4-NEXT:    store i32 [[ADD8]], i32* [[R3]], align 4
// CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4:       omp.body.continue:
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4:       omp.inner.for.inc:
// CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK4-NEXT:    store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK4:       omp.inner.for.end:
// CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4:       omp.loop.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK4-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK4-NEXT:    [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK4-NEXT:    store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK4-NEXT:    [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK4-NEXT:    switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK4-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK4-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK4-NEXT:    ]
// CHECK4:       .omp.reduction.case1:
// CHECK4-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK4-NEXT:    [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
// CHECK4-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
// CHECK4-NEXT:    store i32 [[ADD13]], i32* [[TMP1]], align 4
// CHECK4-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4:       cancel.exit:
// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
// CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK4:       .omp.reduction.case2:
// CHECK4-NEXT:    [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
// CHECK4-NEXT:    [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
// CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4:       .omp.reduction.default:
// CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK4:       omp.precond.end:
// CHECK4-NEXT:    br label [[CANCEL_CONT]]
// CHECK4:       cancel.cont:
// CHECK4-NEXT:    ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK4-NEXT:  entry:
// CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK4-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK4-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK4-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK4-NEXT:    ret void
//
//
// CHECK7-LABEL: define {{[^@]+}}@main
// CHECK7-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[I24:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK7-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK7-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK7-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK7-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
// CHECK7-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7:       omp.inner.for.cond:
// CHECK7-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK7-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7:       omp.inner.for.body:
// CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK7-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK7-NEXT:    ]
// CHECK7:       .omp.sections.case:
// CHECK7-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK7-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK7-NEXT:    br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK7:       .cancel.exit:
// CHECK7-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK7:       .cancel.continue:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK7:       .omp.sections.exit:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7:       omp.inner.for.inc:
// CHECK7-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK7:       omp.inner.for.end:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK7:       cancel.cont:
// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK7-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
// CHECK7-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
// CHECK7-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK7-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK7-NEXT:    store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND6:%.*]]
// CHECK7:       omp.inner.for.cond6:
// CHECK7-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK7-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK7-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK7-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
// CHECK7:       omp.inner.for.body8:
// CHECK7-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK7-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
// CHECK7-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
// CHECK7-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
// CHECK7-NEXT:    ]
// CHECK7:       .omp.sections.case9:
// CHECK7-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK7-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK7-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
// CHECK7:       .cancel.exit10:
// CHECK7-NEXT:    br label [[CANCEL_EXIT19:%.*]]
// CHECK7:       cancel.exit:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[CANCEL_CONT]]
// CHECK7:       .cancel.continue11:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK7:       .omp.sections.case12:
// CHECK7-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK7-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK7-NEXT:    br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
// CHECK7:       .cancel.exit13:
// CHECK7-NEXT:    br label [[CANCEL_EXIT19]]
// CHECK7:       .cancel.continue14:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK7:       .omp.sections.exit15:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
// CHECK7:       omp.inner.for.inc16:
// CHECK7-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK7-NEXT:    [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK7-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND6]]
// CHECK7:       omp.inner.for.end18:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[CANCEL_CONT20:%.*]]
// CHECK7:       cancel.cont20:
// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
// CHECK7-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK7-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
// CHECK7-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK7-NEXT:    [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK7-NEXT:    store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK7-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
// CHECK7-NEXT:    br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK7:       omp.precond.then:
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK7-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK7-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK7-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
// CHECK7-NEXT:    br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK7:       cond.true:
// CHECK7-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK7-NEXT:    br label [[COND_END:%.*]]
// CHECK7:       cond.false:
// CHECK7-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    br label [[COND_END]]
// CHECK7:       cond.end:
// CHECK7-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
// CHECK7-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK7-NEXT:    store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND26:%.*]]
// CHECK7:       omp.inner.for.cond26:
// CHECK7-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
// CHECK7-NEXT:    br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
// CHECK7:       omp.inner.for.body28:
// CHECK7-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK7-NEXT:    store i32 [[ADD]], i32* [[I24]], align 4
// CHECK7-NEXT:    [[TMP35:%.*]] = load float, float* @flag, align 4
// CHECK7-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
// CHECK7-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK7:       omp_if.then:
// CHECK7-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
// CHECK7-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK7-NEXT:    br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
// CHECK7:       .cancel.exit29:
// CHECK7-NEXT:    br label [[CANCEL_EXIT34:%.*]]
// CHECK7:       cancel.exit19:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[CANCEL_CONT20]]
// CHECK7:       .cancel.continue30:
// CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK7:       omp_if.else:
// CHECK7-NEXT:    br label [[OMP_IF_END]]
// CHECK7:       omp_if.end:
// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7:       omp.body.continue:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC31:%.*]]
// CHECK7:       omp.inner.for.inc31:
// CHECK7-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
// CHECK7-NEXT:    store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND26]]
// CHECK7:       omp.inner.for.end33:
// CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK7:       omp.loop.exit:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK7:       cancel.exit34:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK7-NEXT:    br label [[CANCEL_CONT35:%.*]]
// CHECK7:       omp.precond.end:
// CHECK7-NEXT:    br label [[CANCEL_CONT35]]
// CHECK7:       cancel.cont35:
// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK7-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK7-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
// CHECK7-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
// CHECK7-NEXT:    [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
// CHECK7-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK7-NEXT:    [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK7-NEXT:    ret i32 [[TMP43]]
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8***, align 8
// CHECK7-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK7-NEXT:    store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8
// CHECK7-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK7-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
// CHECK7-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK7-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
// CHECK7-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
// CHECK7-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK7:       omp_if.then:
// CHECK7-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK7-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
// CHECK7-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK7-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK7:       .cancel.exit:
// CHECK7-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK7-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
// CHECK7-NEXT:    br label [[RETURN:%.*]]
// CHECK7:       .cancel.continue:
// CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK7:       omp_if.else:
// CHECK7-NEXT:    br label [[OMP_IF_END]]
// CHECK7:       omp_if.end:
// CHECK7-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK7-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
// CHECK7-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
// CHECK7-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK7-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
// CHECK7-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
// CHECK7-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
// CHECK7-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
// CHECK7-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK7-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK7:       .cancel.exit2:
// CHECK7-NEXT:    br label [[RETURN]]
// CHECK7:       .cancel.continue3:
// CHECK7-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK7-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK7-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
// CHECK7-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
// CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
// CHECK7-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
// CHECK7-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
// CHECK7-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
// CHECK7-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
// CHECK7-NEXT:    br label [[RETURN]]
// CHECK7:       return:
// CHECK7-NEXT:    ret void
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK7-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK7-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK7-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK7-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK7-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK7-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK7-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK7-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK7-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK7-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK7-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK7-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK7-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK7-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK7-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK7-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK7-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK7-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK7-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK7-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
// CHECK7-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK7-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK7:       .cancel.exit.i:
// CHECK7-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK7-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
// CHECK7:       .cancel.continue.i:
// CHECK7-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK7-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
// CHECK7:       .omp_outlined..1.exit:
// CHECK7-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK7-NEXT:    ret i32 0
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK7-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// CHECK7-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
// CHECK7-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7:       omp.inner.for.cond:
// CHECK7-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK7-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7:       omp.inner.for.body:
// CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK7-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK7-NEXT:    ]
// CHECK7:       .omp.sections.case:
// CHECK7-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK7-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK7-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK7:       .cancel.exit:
// CHECK7-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK7:       .cancel.continue:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK7:       .omp.sections.exit:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7:       omp.inner.for.inc:
// CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK7:       omp.inner.for.end:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK7:       cancel.cont:
// CHECK7-NEXT:    ret void
// CHECK7:       cancel.exit:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK7-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK7-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
// CHECK7-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
// CHECK7-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK7-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7:       omp.inner.for.cond:
// CHECK7-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK7-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK7-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7:       omp.inner.for.body:
// CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK7-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK7-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
// CHECK7-NEXT:    ]
// CHECK7:       .omp.sections.case:
// CHECK7-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK7-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK7-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK7:       .cancel.exit:
// CHECK7-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK7:       .cancel.continue:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK7:       .omp.sections.case1:
// CHECK7-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK7-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK7-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK7:       .cancel.exit2:
// CHECK7-NEXT:    br label [[CANCEL_EXIT]]
// CHECK7:       .cancel.continue3:
// CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK7:       .omp.sections.exit:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7:       omp.inner.for.inc:
// CHECK7-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK7:       omp.inner.for.end:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK7:       cancel.cont:
// CHECK7-NEXT:    ret void
// CHECK7:       cancel.exit:
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK7-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK7-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK7-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK7-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK7-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK7-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK7-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK7-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK7-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK7-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK7-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK7-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK7-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK7-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK7-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK7:       omp.precond.then:
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK7-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK7-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK7-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK7-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK7-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK7-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK7:       cond.true:
// CHECK7-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK7-NEXT:    br label [[COND_END:%.*]]
// CHECK7:       cond.false:
// CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    br label [[COND_END]]
// CHECK7:       cond.end:
// CHECK7-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK7-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK7-NEXT:    store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK7:       omp.inner.for.cond:
// CHECK7-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK7-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK7-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK7:       omp.inner.for.body:
// CHECK7-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK7-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK7-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK7-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
// CHECK7-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK7-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK7:       .cancel.exit:
// CHECK7-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK7:       .cancel.continue:
// CHECK7-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
// CHECK7-NEXT:    [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
// CHECK7-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
// CHECK7-NEXT:    store i32 [[ADD7]], i32* [[R3]], align 4
// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK7:       omp.body.continue:
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK7:       omp.inner.for.inc:
// CHECK7-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK7-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK7:       omp.inner.for.end:
// CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK7:       omp.loop.exit:
// CHECK7-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
// CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK7-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK7-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
// CHECK7-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
// CHECK7-NEXT:    [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK7-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT:    switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK7-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK7-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK7-NEXT:    ]
// CHECK7:       .omp.reduction.case1:
// CHECK7-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK7-NEXT:    [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
// CHECK7-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK7-NEXT:    store i32 [[ADD9]], i32* [[TMP1]], align 4
// CHECK7-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK7-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7:       cancel.exit:
// CHECK7-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK7-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
// CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK7:       .omp.reduction.case2:
// CHECK7-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
// CHECK7-NEXT:    [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
// CHECK7-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK7:       .omp.reduction.default:
// CHECK7-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK7:       omp.precond.end:
// CHECK7-NEXT:    br label [[CANCEL_CONT]]
// CHECK7:       cancel.cont:
// CHECK7-NEXT:    ret void
//
//
// CHECK7-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK7-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK7-NEXT:  entry:
// CHECK7-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK7-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK7-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK7-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK7-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK7-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK7-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK7-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK7-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK7-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK7-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK7-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK7-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK7-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK7-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK7-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK7-NEXT:    ret void
//
//
// CHECK8-LABEL: define {{[^@]+}}@main
// CHECK8-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[I24:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK8-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK8-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK8-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK8-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
// CHECK8-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8:       omp.inner.for.cond:
// CHECK8-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK8-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8:       omp.inner.for.body:
// CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK8-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK8-NEXT:    ]
// CHECK8:       .omp.sections.case:
// CHECK8-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK8-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK8-NEXT:    br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK8:       .cancel.exit:
// CHECK8-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK8:       .cancel.continue:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK8:       .omp.sections.exit:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8:       omp.inner.for.inc:
// CHECK8-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK8:       omp.inner.for.end:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK8:       cancel.cont:
// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK8-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
// CHECK8-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
// CHECK8-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK8-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
// CHECK8-NEXT:    store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND6:%.*]]
// CHECK8:       omp.inner.for.cond6:
// CHECK8-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK8-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
// CHECK8-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK8-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
// CHECK8:       omp.inner.for.body8:
// CHECK8-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK8-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
// CHECK8-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
// CHECK8-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
// CHECK8-NEXT:    ]
// CHECK8:       .omp.sections.case9:
// CHECK8-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK8-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK8-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
// CHECK8:       .cancel.exit10:
// CHECK8-NEXT:    br label [[CANCEL_EXIT19:%.*]]
// CHECK8:       cancel.exit:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[CANCEL_CONT]]
// CHECK8:       .cancel.continue11:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK8:       .omp.sections.case12:
// CHECK8-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
// CHECK8-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK8-NEXT:    br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
// CHECK8:       .cancel.exit13:
// CHECK8-NEXT:    br label [[CANCEL_EXIT19]]
// CHECK8:       .cancel.continue14:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
// CHECK8:       .omp.sections.exit15:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
// CHECK8:       omp.inner.for.inc16:
// CHECK8-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK8-NEXT:    [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK8-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND6]]
// CHECK8:       omp.inner.for.end18:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[CANCEL_CONT20:%.*]]
// CHECK8:       cancel.cont20:
// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
// CHECK8-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK8-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
// CHECK8-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK8-NEXT:    [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK8-NEXT:    store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK8-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
// CHECK8-NEXT:    br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK8:       omp.precond.then:
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK8-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK8-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK8-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
// CHECK8-NEXT:    br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK8:       cond.true:
// CHECK8-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
// CHECK8-NEXT:    br label [[COND_END:%.*]]
// CHECK8:       cond.false:
// CHECK8-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    br label [[COND_END]]
// CHECK8:       cond.end:
// CHECK8-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
// CHECK8-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK8-NEXT:    store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND26:%.*]]
// CHECK8:       omp.inner.for.cond26:
// CHECK8-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
// CHECK8-NEXT:    br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
// CHECK8:       omp.inner.for.body28:
// CHECK8-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK8-NEXT:    store i32 [[ADD]], i32* [[I24]], align 4
// CHECK8-NEXT:    [[TMP35:%.*]] = load float, float* @flag, align 4
// CHECK8-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
// CHECK8-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK8:       omp_if.then:
// CHECK8-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
// CHECK8-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK8-NEXT:    br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
// CHECK8:       .cancel.exit29:
// CHECK8-NEXT:    br label [[CANCEL_EXIT34:%.*]]
// CHECK8:       cancel.exit19:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[CANCEL_CONT20]]
// CHECK8:       .cancel.continue30:
// CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK8:       omp_if.else:
// CHECK8-NEXT:    br label [[OMP_IF_END]]
// CHECK8:       omp_if.end:
// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8:       omp.body.continue:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC31:%.*]]
// CHECK8:       omp.inner.for.inc31:
// CHECK8-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
// CHECK8-NEXT:    store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND26]]
// CHECK8:       omp.inner.for.end33:
// CHECK8-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK8:       omp.loop.exit:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK8:       cancel.exit34:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
// CHECK8-NEXT:    br label [[CANCEL_CONT35:%.*]]
// CHECK8:       omp.precond.end:
// CHECK8-NEXT:    br label [[CANCEL_CONT35]]
// CHECK8:       cancel.cont35:
// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK8-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK8-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
// CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
// CHECK8-NEXT:    [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
// CHECK8-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK8-NEXT:    [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK8-NEXT:    ret i32 [[TMP43]]
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8***, align 8
// CHECK8-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK8-NEXT:    store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8
// CHECK8-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK8-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
// CHECK8-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK8-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
// CHECK8-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
// CHECK8-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK8:       omp_if.then:
// CHECK8-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK8-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
// CHECK8-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
// CHECK8-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK8:       .cancel.exit:
// CHECK8-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
// CHECK8-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
// CHECK8-NEXT:    br label [[RETURN:%.*]]
// CHECK8:       .cancel.continue:
// CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK8:       omp_if.else:
// CHECK8-NEXT:    br label [[OMP_IF_END]]
// CHECK8:       omp_if.end:
// CHECK8-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK8-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
// CHECK8-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
// CHECK8-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK8-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
// CHECK8-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
// CHECK8-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
// CHECK8-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
// CHECK8-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK8-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK8:       .cancel.exit2:
// CHECK8-NEXT:    br label [[RETURN]]
// CHECK8:       .cancel.continue3:
// CHECK8-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK8-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
// CHECK8-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
// CHECK8-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
// CHECK8-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
// CHECK8-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
// CHECK8-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
// CHECK8-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
// CHECK8-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
// CHECK8-NEXT:    br label [[RETURN]]
// CHECK8:       return:
// CHECK8-NEXT:    ret void
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK8-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK8-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK8-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK8-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK8-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK8-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK8-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK8-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK8-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK8-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK8-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK8-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK8-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK8-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK8-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK8-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK8-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK8-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK8-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK8-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK8-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
// CHECK8-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK8-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK8:       .cancel.exit.i:
// CHECK8-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK8-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
// CHECK8:       .cancel.continue.i:
// CHECK8-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK8-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
// CHECK8:       .omp_outlined..1.exit:
// CHECK8-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK8-NEXT:    ret i32 0
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK8-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// CHECK8-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
// CHECK8-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8:       omp.inner.for.cond:
// CHECK8-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK8-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8:       omp.inner.for.body:
// CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK8-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK8-NEXT:    ]
// CHECK8:       .omp.sections.case:
// CHECK8-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK8-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK8-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK8:       .cancel.exit:
// CHECK8-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK8:       .cancel.continue:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK8:       .omp.sections.exit:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8:       omp.inner.for.inc:
// CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK8:       omp.inner.for.end:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK8:       cancel.cont:
// CHECK8-NEXT:    ret void
// CHECK8:       cancel.exit:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK8-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK8-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
// CHECK8-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
// CHECK8-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK8-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8:       omp.inner.for.cond:
// CHECK8-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK8-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK8-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8:       omp.inner.for.body:
// CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK8-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK8-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
// CHECK8-NEXT:    ]
// CHECK8:       .omp.sections.case:
// CHECK8-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK8-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK8-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK8:       .cancel.exit:
// CHECK8-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK8:       .cancel.continue:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK8:       .omp.sections.case1:
// CHECK8-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
// CHECK8-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK8-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
// CHECK8:       .cancel.exit2:
// CHECK8-NEXT:    br label [[CANCEL_EXIT]]
// CHECK8:       .cancel.continue3:
// CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK8:       .omp.sections.exit:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8:       omp.inner.for.inc:
// CHECK8-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK8:       omp.inner.for.end:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK8:       cancel.cont:
// CHECK8-NEXT:    ret void
// CHECK8:       cancel.exit:
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
// CHECK8-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..4
// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK8-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK8-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK8-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK8-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK8-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK8-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK8-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK8-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK8-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK8-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK8-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK8-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK8-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK8-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK8:       omp.precond.then:
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK8-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK8-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK8-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK8-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK8-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
// CHECK8-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK8:       cond.true:
// CHECK8-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK8-NEXT:    br label [[COND_END:%.*]]
// CHECK8:       cond.false:
// CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    br label [[COND_END]]
// CHECK8:       cond.end:
// CHECK8-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
// CHECK8-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK8-NEXT:    store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK8:       omp.inner.for.cond:
// CHECK8-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK8-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK8-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK8:       omp.inner.for.body:
// CHECK8-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK8-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK8-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
// CHECK8-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
// CHECK8-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK8-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK8:       .cancel.exit:
// CHECK8-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK8:       .cancel.continue:
// CHECK8-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
// CHECK8-NEXT:    [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
// CHECK8-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
// CHECK8-NEXT:    store i32 [[ADD7]], i32* [[R3]], align 4
// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK8:       omp.body.continue:
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK8:       omp.inner.for.inc:
// CHECK8-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
// CHECK8-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK8:       omp.inner.for.end:
// CHECK8-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK8:       omp.loop.exit:
// CHECK8-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
// CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK8-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK8-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
// CHECK8-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
// CHECK8-NEXT:    [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK8-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK8-NEXT:    switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK8-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK8-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK8-NEXT:    ]
// CHECK8:       .omp.reduction.case1:
// CHECK8-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK8-NEXT:    [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
// CHECK8-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
// CHECK8-NEXT:    store i32 [[ADD9]], i32* [[TMP1]], align 4
// CHECK8-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK8-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK8:       cancel.exit:
// CHECK8-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK8-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
// CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK8:       .omp.reduction.case2:
// CHECK8-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
// CHECK8-NEXT:    [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
// CHECK8-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK8:       .omp.reduction.default:
// CHECK8-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK8:       omp.precond.end:
// CHECK8-NEXT:    br label [[CANCEL_CONT]]
// CHECK8:       cancel.cont:
// CHECK8-NEXT:    ret void
//
//
// CHECK8-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK8-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK8-NEXT:  entry:
// CHECK8-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK8-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK8-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK8-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK8-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK8-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK8-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK8-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK8-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK8-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK8-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK8-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK8-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK8-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK8-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK8-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK8-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK8-NEXT:    ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@main
// CHECK9-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK9-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[I36:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK9-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK9-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK9-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK9-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK9:       omp_parallel:
// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
// CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK9:       omp.par.outlined.exit:
// CHECK9-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK9:       omp.par.exit.split:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
// CHECK9:       omp_section_loop.preheader:
// CHECK9-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
// CHECK9-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
// CHECK9-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
// CHECK9-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER:%.*]]
// CHECK9:       omp_section_loop.header:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_COND:%.*]]
// CHECK9:       omp_section_loop.cond:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
// CHECK9-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
// CHECK9:       omp_section_loop.body:
// CHECK9-NEXT:    [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
// CHECK9-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], 1
// CHECK9-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
// CHECK9-NEXT:    switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
// CHECK9-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
// CHECK9-NEXT:    ]
// CHECK9:       omp_section_loop.inc:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
// CHECK9:       omp_section_loop.exit:
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
// CHECK9:       omp_section_loop.after:
// CHECK9-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
// CHECK9:       omp_sections.end:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
// CHECK9:       omp_section_loop.preheader13:
// CHECK9-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
// CHECK9-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
// CHECK9-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
// CHECK9:       omp_section_loop.header14:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
// CHECK9:       omp_section_loop.cond15:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
// CHECK9-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
// CHECK9:       omp_section_loop.body16:
// CHECK9-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
// CHECK9-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
// CHECK9-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
// CHECK9-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
// CHECK9-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
// CHECK9-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
// CHECK9-NEXT:    ]
// CHECK9:       omp_section_loop.inc17:
// CHECK9-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
// CHECK9:       omp_section_loop.exit18:
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
// CHECK9:       omp_section_loop.after19:
// CHECK9-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
// CHECK9:       omp_sections.end33:
// CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK9-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK9-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK9-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK9-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
// CHECK9-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9:       omp.precond.then:
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK9-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK9-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK9-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
// CHECK9-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9:       cond.true:
// CHECK9-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK9-NEXT:    br label [[COND_END:%.*]]
// CHECK9:       cond.false:
// CHECK9-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    br label [[COND_END]]
// CHECK9:       cond.end:
// CHECK9-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
// CHECK9-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK9-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9:       omp.inner.for.cond:
// CHECK9-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
// CHECK9-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9:       omp.inner.for.body:
// CHECK9-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
// CHECK9-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
// CHECK9-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
// CHECK9-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
// CHECK9-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
// CHECK9-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK9:       omp_if.then:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK9-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
// CHECK9-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK9-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK9:       .cancel.exit:
// CHECK9-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK9:       omp_section_loop.body.case:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
// CHECK9-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
// CHECK9-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
// CHECK9:       omp_section_loop.body.case.split:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK9:       omp_section_loop.body.case.cncl:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK9:       omp_section_loop.body.case23:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
// CHECK9-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
// CHECK9:       omp_section_loop.body.case23.split:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK9:       omp_section_loop.body.case23.cncl:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK9:       omp_section_loop.body.case25:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
// CHECK9-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
// CHECK9-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
// CHECK9:       omp_section_loop.body.case25.split:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK9:       omp_section_loop.body.case25.cncl:
// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK9:       .cancel.continue:
// CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK9:       omp_if.else:
// CHECK9-NEXT:    br label [[OMP_IF_END]]
// CHECK9:       omp_if.end:
// CHECK9-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9:       omp.body.continue:
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9:       omp.inner.for.inc:
// CHECK9-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
// CHECK9-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK9:       omp.inner.for.end:
// CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9:       omp.loop.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
// CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK9:       cancel.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
// CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK9:       omp.precond.end:
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
// CHECK9:       cancel.cont:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
// CHECK9-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK9-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK9-NEXT:    [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK9-NEXT:    ret i32 [[TMP40]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@main..omp_par
// CHECK9-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK9-NEXT:  omp.par.entry:
// CHECK9-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
// CHECK9-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
// CHECK9-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
// CHECK9-NEXT:    br label [[OMP_PAR_REGION:%.*]]
// CHECK9:       omp.par.outlined.exit.exitStub:
// CHECK9-NEXT:    ret void
// CHECK9:       omp.par.region:
// CHECK9-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
// CHECK9-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
// CHECK9:       2:
// CHECK9-NEXT:    br label [[TMP3:%.*]]
// CHECK9:       3:
// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK9-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
// CHECK9-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
// CHECK9-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK9-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
// CHECK9-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK9-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
// CHECK9:       .cncl5:
// CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
// CHECK9:       .cont:
// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK9-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK9-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
// CHECK9-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
// CHECK9-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
// CHECK9-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
// CHECK9-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// CHECK9-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
// CHECK9-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
// CHECK9-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK9:       omp.par.pre_finalize:
// CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK9:       13:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
// CHECK9-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
// CHECK9-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
// CHECK9:       .cncl:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK9:       .split:
// CHECK9-NEXT:    br label [[TMP3]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK9-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK9-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK9-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK9-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK9-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK9-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK9-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK9-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK9-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK9-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK9-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK9-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK9-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK9-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK9-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK9-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
// CHECK9-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
// CHECK9-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK9-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK9:       .cancel.exit.i:
// CHECK9-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK9-NEXT:    br label [[DOTOMP_OUTLINED__EXIT:%.*]]
// CHECK9:       .cancel.continue.i:
// CHECK9-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK9-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK9:       .omp_outlined..exit:
// CHECK9-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK9-NEXT:    ret i32 0
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
// CHECK9-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
// CHECK9-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK9-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9:       omp.inner.for.cond:
// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK9-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9:       omp.inner.for.body:
// CHECK9-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK9-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK9-NEXT:    ]
// CHECK9:       .omp.sections.case:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK9-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK9:       .omp.sections.case.split:
// CHECK9-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK9:       .omp.sections.case.cncl:
// CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK9:       .omp.sections.exit:
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9:       omp.inner.for.inc:
// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK9-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK9:       omp.inner.for.end:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
// CHECK9:       cancel.cont:
// CHECK9-NEXT:    ret void
// CHECK9:       cancel.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
// CHECK9-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
// CHECK9-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK9-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9:       omp.inner.for.cond:
// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK9-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK9-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9:       omp.inner.for.body:
// CHECK9-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK9-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK9-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
// CHECK9-NEXT:    ]
// CHECK9:       .omp.sections.case:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK9-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK9:       .omp.sections.case.split:
// CHECK9-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK9:       .omp.sections.case.cncl:
// CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK9:       .omp.sections.case2:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK9-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
// CHECK9-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
// CHECK9-NEXT:    br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
// CHECK9:       .omp.sections.case2.split:
// CHECK9-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK9:       .omp.sections.case2.cncl:
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_END]]
// CHECK9:       .omp.sections.exit:
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9:       omp.inner.for.inc:
// CHECK9-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK9-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK9:       omp.inner.for.end:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
// CHECK9:       cancel.cont:
// CHECK9-NEXT:    ret void
// CHECK9:       cancel.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK9-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK9-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK9-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK9-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK9-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK9-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK9-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK9-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK9-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9:       omp.precond.then:
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK9-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9:       cond.true:
// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT:    br label [[COND_END:%.*]]
// CHECK9:       cond.false:
// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    br label [[COND_END]]
// CHECK9:       cond.end:
// CHECK9-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK9-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK9-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK9:       omp.inner.for.cond:
// CHECK9-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK9-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK9-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9:       omp.inner.for.body:
// CHECK9-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK9-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
// CHECK9-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
// CHECK9-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
// CHECK9-NEXT:    br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK9:       .cancel.exit:
// CHECK9-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK9:       .cancel.continue:
// CHECK9-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
// CHECK9-NEXT:    [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
// CHECK9-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
// CHECK9-NEXT:    store i32 [[ADD8]], i32* [[R3]], align 4
// CHECK9-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9:       omp.body.continue:
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9:       omp.inner.for.inc:
// CHECK9-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK9-NEXT:    store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
// CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK9:       omp.inner.for.end:
// CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9:       omp.loop.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK9-NEXT:    store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK9-NEXT:    [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT:    switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK9-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK9-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK9-NEXT:    ]
// CHECK9:       .omp.reduction.case1:
// CHECK9-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK9-NEXT:    [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
// CHECK9-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
// CHECK9-NEXT:    store i32 [[ADD13]], i32* [[TMP1]], align 4
// CHECK9-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9:       cancel.exit:
// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
// CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK9:       .omp.reduction.case2:
// CHECK9-NEXT:    [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
// CHECK9-NEXT:    [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
// CHECK9-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9:       .omp.reduction.default:
// CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK9:       omp.precond.end:
// CHECK9-NEXT:    br label [[CANCEL_CONT]]
// CHECK9:       cancel.cont:
// CHECK9-NEXT:    ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK9-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK9-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK9-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK9-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK9-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK9-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK9-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK9-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK9-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK9-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK9-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK9-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK9-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK9-NEXT:    ret void
//
//
// CHECK10-LABEL: define {{[^@]+}}@main
// CHECK10-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
// CHECK10-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[I36:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK10-NEXT:    [[R:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    store i32 0, i32* [[RETVAL]], align 4
// CHECK10-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
// CHECK10-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK10-NEXT:    br label [[OMP_PARALLEL:%.*]]
// CHECK10:       omp_parallel:
// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
// CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
// CHECK10:       omp.par.outlined.exit:
// CHECK10-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// CHECK10:       omp.par.exit.split:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
// CHECK10:       omp_section_loop.preheader:
// CHECK10-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
// CHECK10-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
// CHECK10-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
// CHECK10-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER:%.*]]
// CHECK10:       omp_section_loop.header:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_COND:%.*]]
// CHECK10:       omp_section_loop.cond:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
// CHECK10-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
// CHECK10:       omp_section_loop.body:
// CHECK10-NEXT:    [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
// CHECK10-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], 1
// CHECK10-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
// CHECK10-NEXT:    switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
// CHECK10-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
// CHECK10-NEXT:    ]
// CHECK10:       omp_section_loop.inc:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
// CHECK10:       omp_section_loop.exit:
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
// CHECK10:       omp_section_loop.after:
// CHECK10-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
// CHECK10:       omp_sections.end:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
// CHECK10:       omp_section_loop.preheader13:
// CHECK10-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
// CHECK10-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
// CHECK10-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
// CHECK10-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
// CHECK10:       omp_section_loop.header14:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
// CHECK10:       omp_section_loop.cond15:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
// CHECK10-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
// CHECK10:       omp_section_loop.body16:
// CHECK10-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
// CHECK10-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
// CHECK10-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
// CHECK10-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
// CHECK10-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
// CHECK10-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
// CHECK10-NEXT:    ]
// CHECK10:       omp_section_loop.inc17:
// CHECK10-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
// CHECK10:       omp_section_loop.exit18:
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
// CHECK10:       omp_section_loop.after19:
// CHECK10-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
// CHECK10:       omp_sections.end33:
// CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK10-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
// CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK10-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK10-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK10-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
// CHECK10-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK10:       omp.precond.then:
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK10-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK10-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK10-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
// CHECK10-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK10:       cond.true:
// CHECK10-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
// CHECK10-NEXT:    br label [[COND_END:%.*]]
// CHECK10:       cond.false:
// CHECK10-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    br label [[COND_END]]
// CHECK10:       cond.end:
// CHECK10-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
// CHECK10-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK10-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10:       omp.inner.for.cond:
// CHECK10-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
// CHECK10-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10:       omp.inner.for.body:
// CHECK10-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
// CHECK10-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
// CHECK10-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
// CHECK10-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
// CHECK10-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
// CHECK10-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK10:       omp_if.then:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK10-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
// CHECK10-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK10-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK10:       .cancel.exit:
// CHECK10-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK10:       omp_section_loop.body.case:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
// CHECK10-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
// CHECK10-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
// CHECK10:       omp_section_loop.body.case.split:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK10:       omp_section_loop.body.case.cncl:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
// CHECK10:       omp_section_loop.body.case23:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
// CHECK10-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
// CHECK10:       omp_section_loop.body.case23.split:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK10:       omp_section_loop.body.case23.cncl:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK10:       omp_section_loop.body.case25:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
// CHECK10-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
// CHECK10-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
// CHECK10:       omp_section_loop.body.case25.split:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK10:       omp_section_loop.body.case25.cncl:
// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
// CHECK10:       .cancel.continue:
// CHECK10-NEXT:    br label [[OMP_IF_END:%.*]]
// CHECK10:       omp_if.else:
// CHECK10-NEXT:    br label [[OMP_IF_END]]
// CHECK10:       omp_if.end:
// CHECK10-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10:       omp.body.continue:
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10:       omp.inner.for.inc:
// CHECK10-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
// CHECK10-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK10:       omp.inner.for.end:
// CHECK10-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10:       omp.loop.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
// CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK10:       cancel.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
// CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK10:       omp.precond.end:
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
// CHECK10:       cancel.cont:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
// CHECK10-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
// CHECK10-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
// CHECK10-NEXT:    store i32 0, i32* [[R]], align 4
// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
// CHECK10-NEXT:    [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK10-NEXT:    ret i32 [[TMP40]]
//
//
// CHECK10-LABEL: define {{[^@]+}}@main..omp_par
// CHECK10-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK10-NEXT:  omp.par.entry:
// CHECK10-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
// CHECK10-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
// CHECK10-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
// CHECK10-NEXT:    br label [[OMP_PAR_REGION:%.*]]
// CHECK10:       omp.par.outlined.exit.exitStub:
// CHECK10-NEXT:    ret void
// CHECK10:       omp.par.region:
// CHECK10-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
// CHECK10-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
// CHECK10-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
// CHECK10:       2:
// CHECK10-NEXT:    br label [[TMP3:%.*]]
// CHECK10:       3:
// CHECK10-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK10-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
// CHECK10-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
// CHECK10-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
// CHECK10-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
// CHECK10-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK10-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
// CHECK10:       .cncl5:
// CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
// CHECK10:       .cont:
// CHECK10-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
// CHECK10-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
// CHECK10-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
// CHECK10-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
// CHECK10-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
// CHECK10-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
// CHECK10-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// CHECK10-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
// CHECK10-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
// CHECK10-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK10:       omp.par.pre_finalize:
// CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK10:       13:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
// CHECK10-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
// CHECK10-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
// CHECK10:       .cncl:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
// CHECK10:       .split:
// CHECK10-NEXT:    br label [[TMP3]]
//
//
// CHECK10-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK10-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK10-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK10-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK10-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK10-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK10-NEXT:    store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK10-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK10-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK10-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK10-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK10-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK10-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK10-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK10-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK10-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK10-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
// CHECK10-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
// CHECK10-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
// CHECK10-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK10-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK10:       .cancel.exit.i:
// CHECK10-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK10-NEXT:    br label [[DOTOMP_OUTLINED__EXIT:%.*]]
// CHECK10:       .cancel.continue.i:
// CHECK10-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK10-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK10:       .omp_outlined..exit:
// CHECK10-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
// CHECK10-NEXT:    ret i32 0
//
//
// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
// CHECK10-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
// CHECK10-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK10-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10:       omp.inner.for.cond:
// CHECK10-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK10-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10:       omp.inner.for.body:
// CHECK10-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK10-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK10-NEXT:    ]
// CHECK10:       .omp.sections.case:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK10-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK10:       .omp.sections.case.split:
// CHECK10-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK10:       .omp.sections.case.cncl:
// CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK10:       .omp.sections.exit:
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10:       omp.inner.for.inc:
// CHECK10-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK10-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK10:       omp.inner.for.end:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
// CHECK10:       cancel.cont:
// CHECK10-NEXT:    ret void
// CHECK10:       cancel.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
// CHECK10-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
// CHECK10-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK10-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10:       omp.inner.for.cond:
// CHECK10-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK10-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK10-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10:       omp.inner.for.body:
// CHECK10-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK10-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK10-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
// CHECK10-NEXT:    ]
// CHECK10:       .omp.sections.case:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
// CHECK10-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
// CHECK10:       .omp.sections.case.split:
// CHECK10-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK10:       .omp.sections.case.cncl:
// CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK10:       .omp.sections.case2:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK10-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
// CHECK10-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
// CHECK10-NEXT:    br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
// CHECK10:       .omp.sections.case2.split:
// CHECK10-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK10:       .omp.sections.case2.cncl:
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_END]]
// CHECK10:       .omp.sections.exit:
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10:       omp.inner.for.inc:
// CHECK10-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK10-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK10:       omp.inner.for.end:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
// CHECK10:       cancel.cont:
// CHECK10-NEXT:    ret void
// CHECK10:       cancel.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
//
//
// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3
// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
// CHECK10-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[TMP:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[I:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[R3:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[I4:%.*]] = alloca i32, align 4
// CHECK10-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
// CHECK10-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK10-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
// CHECK10-NEXT:    store i32* [[R]], i32** [[R_ADDR]], align 8
// CHECK10-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
// CHECK10-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
// CHECK10-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK10-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[I]], align 4
// CHECK10-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
// CHECK10-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK10-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK10:       omp.precond.then:
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK10-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK10-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK10-NEXT:    store i32 0, i32* [[R3]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK10-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK10-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
// CHECK10-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK10:       cond.true:
// CHECK10-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
// CHECK10-NEXT:    br label [[COND_END:%.*]]
// CHECK10:       cond.false:
// CHECK10-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    br label [[COND_END]]
// CHECK10:       cond.end:
// CHECK10-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
// CHECK10-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK10-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK10:       omp.inner.for.cond:
// CHECK10-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK10-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
// CHECK10-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK10:       omp.inner.for.body:
// CHECK10-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK10-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
// CHECK10-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
// CHECK10-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
// CHECK10-NEXT:    br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
// CHECK10:       .cancel.exit:
// CHECK10-NEXT:    br label [[CANCEL_EXIT:%.*]]
// CHECK10:       .cancel.continue:
// CHECK10-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
// CHECK10-NEXT:    [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
// CHECK10-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
// CHECK10-NEXT:    store i32 [[ADD8]], i32* [[R3]], align 4
// CHECK10-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK10:       omp.body.continue:
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK10:       omp.inner.for.inc:
// CHECK10-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK10-NEXT:    store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
// CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND]]
// CHECK10:       omp.inner.for.end:
// CHECK10-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
// CHECK10:       omp.loop.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
// CHECK10-NEXT:    store i8* [[TMP20]], i8** [[TMP19]], align 8
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
// CHECK10-NEXT:    [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK10-NEXT:    switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// CHECK10-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// CHECK10-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK10-NEXT:    ]
// CHECK10:       .omp.reduction.case1:
// CHECK10-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
// CHECK10-NEXT:    [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
// CHECK10-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
// CHECK10-NEXT:    store i32 [[ADD13]], i32* [[TMP1]], align 4
// CHECK10-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK10-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK10:       cancel.exit:
// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
// CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
// CHECK10:       .omp.reduction.case2:
// CHECK10-NEXT:    [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
// CHECK10-NEXT:    [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
// CHECK10-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK10:       .omp.reduction.default:
// CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
// CHECK10:       omp.precond.end:
// CHECK10-NEXT:    br label [[CANCEL_CONT]]
// CHECK10:       cancel.cont:
// CHECK10-NEXT:    ret void
//
//
// CHECK10-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
// CHECK10-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
// CHECK10-NEXT:  entry:
// CHECK10-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
// CHECK10-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK10-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK10-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK10-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
// CHECK10-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
// CHECK10-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
// CHECK10-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
// CHECK10-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
// CHECK10-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
// CHECK10-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
// CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
// CHECK10-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
// CHECK10-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
// CHECK10-NEXT:    ret void
//
